# Calculate/load Differential Expression metrics for all genes, load SFARI dataset:
# Gene expression data
load('./../working_data/RNAseq_ASD_4region_normalized_vst.Rdata')
# SFARI genes
SFARI_genes = read_csv('./../working_data/SFARI_genes_01-15-2019.csv')
SFARI_genes = SFARI_genes %>% inner_join(datProbes, by=c('gene-symbol'='external_gene_id')) %>%
mutate('ID' = ensembl_gene_id) %>%
dplyr::select(ID, `gene-score`, syndromic)
# Balance Groups by covariates, remove singular batches (none)
to_keep = (datMeta$Subject_ID != 'AN03345')
datMeta = datMeta[to_keep,]
datExpr = datExpr[,to_keep]
if(!file.exists('./../working_data/genes_ASD_DE_info_vst.csv')) {
# Calculate differential expression for ASD
mod = model.matrix(~ Dx, data=datMeta)
corfit = duplicateCorrelation(datExpr, mod, block=datMeta$Subject_ID)
lmfit = lmFit(datExpr, design=mod, block=datMeta$Subject_ID, correlation=corfit$consensus)
fit = eBayes(lmfit, trend=T, robust=T)
top_genes = topTable(fit, coef=2, number=nrow(datExpr))
genes_DE_info = top_genes[match(rownames(datExpr), rownames(top_genes)),] %>%
mutate('ID'=rownames(datExpr)) %>% left_join(SFARI_genes, by='ID')
write_csv(genes_DE_info, path='./../working_data/genes_ASD_DE_info_vst.csv')
rm(mod, corfit, lmfit, fit, top_genes)
} else {
genes_DE_info = read_csv('./../working_data/genes_ASD_DE_info_vst.csv')
}
genes_DE_info = genes_DE_info %>% dplyr::select(ID, logFC, AveExpr, t, P.Value, adj.P.Val,
B, `gene-score`, syndromic) %>%
mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`))
datExpr = datExpr %>% data.frame
datExpr_backup = datExpr
rm(to_keep, datProbes)
Raw data
datExpr_raw = read.csv('./../raw_data/RNAseq_ASD_datExpr.csv', row.names=1)
datExpr_raw = datExpr_raw[substr(rownames(datExpr_raw),1,3)=='ENS',
substring(colnames(datExpr_raw),2) %in% datMeta$Dissected_Sample_ID]
datExpr_raw = datExpr_raw[rowSums(datExpr_raw)>5,]
datExpr_ASD = datExpr_raw %>% dplyr::select(which(datMeta$Diagnosis_=='ASD')) %>% rowMeans
datExpr_CTL = datExpr_raw %>% dplyr::select(which(datMeta$Diagnosis_!='ASD')) %>% rowMeans
ASD_vs_CTL = data.frame('ID'=rownames(datExpr_raw), 'ASD'=datExpr_ASD+1, 'CTL'=datExpr_CTL+1) %>%
left_join(SFARI_genes, by='ID') %>% mutate(`gene-score`=as.factor(`gene-score`))
p1 = ggplotly(ASD_vs_CTL %>% ggplot(aes(x=ASD, y=CTL, color=`gene-score`)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') + geom_point(alpha=0.4) +
geom_smooth(method=lm, se=FALSE, color='gray') +
ggtitle('Mean expression by gene ASD vs CTL') + theme_minimal())
p2 = ggplotly(ASD_vs_CTL %>% dplyr::select(ID, ASD, CTL) %>% melt %>%
ggplot(aes(variable, value, fill=variable)) + geom_boxplot() +
coord_cartesian(ylim = c(0, 500)) + theme_minimal())
subplot(p1, p2, nrows=1)
remove(p1, p2, ASD_vs_CTL)
VST normalised data
datExpr_ASD = datExpr %>% dplyr::select(which(datMeta$Diagnosis_=='ASD')) %>% rowMeans
datExpr_CTL = datExpr %>% dplyr::select(which(datMeta$Diagnosis_!='ASD')) %>% rowMeans
ASD_vs_CTL = data.frame('ID'=rownames(datExpr), 'ASD'=datExpr_ASD+1, 'CTL'=datExpr_CTL+1) %>%
left_join(SFARI_genes, by='ID') %>% mutate(`gene-score`=as.factor(`gene-score`))
p1 = ggplotly(ASD_vs_CTL %>% ggplot(aes(x=ASD, y=CTL, color=`gene-score`)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') + geom_point(alpha=0.4) +
geom_smooth(method=lm, se=FALSE, color='gray') +
ggtitle('Mean expression by gene ASD vs CTL') + theme_minimal())
p2 = ggplotly(ASD_vs_CTL %>% dplyr::select(ID, ASD, CTL) %>% melt %>%
ggplot(aes(variable, value, fill=variable)) + geom_boxplot() + theme_minimal())
subplot(p1, p2, nrows=1)
remove(p1, p2, asd_ctl_mean)
By Brain Region
scatter_plot_by_lobe = function(lobe){
lobe_samples = as.numeric(datMeta$Brain_lobe==lobe)
datExpr_ASD = datExpr %>% dplyr::select(which(datMeta$Diagnosis_=='ASD' & datMeta$Brain_lobe==lobe)) %>% rowMeans
datExpr_CTL = datExpr %>% dplyr::select(which(datMeta$Diagnosis_!='ASD' & datMeta$Brain_lobe==lobe)) %>% rowMeans
ASD_vs_CTL = data.frame('ID'=rownames(datExpr), 'ASD'=datExpr_ASD+1, 'CTL'=datExpr_CTL+1) %>%
left_join(SFARI_genes, by='ID') %>% mutate(`gene-score`=as.factor(`gene-score`))
p1 = ASD_vs_CTL %>% ggplot(aes(x=ASD, y=CTL, color=`gene-score`)) +
scale_x_continuous(trans='log2') + scale_y_continuous(trans='log2') + geom_point(alpha=0.4) +
geom_smooth(method=lm, se=FALSE, color='gray') +
ggtitle(glue('Mean expression by gene ASD vs CTL in ',lobe, ' lobe')) + theme_minimal()
p2 = ASD_vs_CTL %>% dplyr::select(ID, ASD, CTL) %>% melt %>%
ggplot(aes(variable, value, fill=variable)) + geom_boxplot() + theme_minimal()
return(list(p1,p2))
}
ps = list()
for(lobe in unique(datMeta$Brain_lobe)){
ps[[lobe]] = scatter_plot_by_lobe(lobe)
}
require(grid.arrange)
grid.arrange(ps[['Frontal']][[1]], ps[['Frontal']][[2]],
ps[['Temporal']][[1]], ps[['Temporal']][[2]],
ps[['Parietal']][[1]], ps[['Parietal']][[2]],
ps[['Occipital']][[1]], ps[['Occipital']][[2]], nrow=4)
